home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
BBS in a Box 7
/
BBS in a Box - Macintosh - Volume VII (BBS in a Box) (January 1993).iso
/
Files
/
Prog
/
Q-R
/
R⁄O strsml.cpt
/
Ratcliff_Obershelp
/
strsml2.c
< prev
Wrap
C/C++ Source or Header
|
1989-02-07
|
2KB
|
114 lines
#include <strings.h>
int stcknum;
char *ststr1l[26], *ststr2l[26], *ststr1r[26], *ststr2r[26];
/*
simil(str1, str2) - returns a value signifying how similar two
strings are (0 - completely different, 100 - exactly the same)
using the Ratcliff/Obershelp pattern matching algorithm.
(Code courtesy of Joe Preston published in DDJ, Nov 1988, #145, pg.12, 118).
*/
int simil(str1, str2)
char *str1, *str2;
{
int len1, len2, ncmp, score;
char *di, *si, *de, *se, *cl1, *cl2, *cr1, *cr2;
score = stcknum = 0;
len1 = strlen(str1);
len2 = strlen(str2);
if (len1 == 0 || len2 == 0)
return (score);
pushst(str1, str1 + len1 - 1, str2, str2 + len2 - 1);
while (stcknum != 0)
{
popst(&si, &se, &di, &de);
cl1 = si;
cl2 = di;
cr1 = se;
cr2 = de;
if ((ncmp = compare(&si, &se, &di, &de)) != 0)
{
score += ncmp * 2;
if (cl1 != si && cl2 != di && cl1 != si-1 || cl2 != di-1)
pushst(cl1, si-1, cl2, di-1);
if (se != cr1 && de != cr2 && se+1 != cr1 || de+1 != cr2);
pushst(se+1, cr1, de+1, cr2);
}
}
return (100 * score / (len1 + len2));
}
/*
compare(si, se, di, de) - returns the largest number of characters common to both strings.
*/
int compare(si, se, di, de)
char **si, **se, **di, **de;
{
int maxchars, l, len1;
char *i, *j, *m, *n, *s2end, *cl1, *cl2, *cr1, *cr2;
maxchars = 0;
for (i = (*si); i <= *se - maxchars; i++)
{
len1 = *se - i;
for (j = (*di); j <= *de - maxchars; j++)
{
s2end = j + len1;
if (s2end > *de)
s2end = *de;
for (m = i, n = j, l = 0; *m == *n && n <= s2end; m++, n++)
l++;
if (l > 0)
{
if (l <= maxchars)
j += l - 1;
else
{
cl1 = i;
cl2 = j;
maxchars = l;
l--;
j += l;
cr2 = j;
cr1 = i+1;
}
}
}
}
*si = cl1;
*se = cr1;
*di = cl2;
*de = cr2;
return (maxchars);
}
pushst(si, se, di, de)
char *si, *se, *di, *de;
{
ststr1l[stcknum] = si;
ststr1r[stcknum] = se;
ststr2l[stcknum] = di;
ststr2r[stcknum] = de;
stcknum++;
}
popst(si, se, di, de)
char **si, **se, **di, **de;
{
stcknum--;
*si = ststr1l[stcknum];
*se = ststr1r[stcknum];
*di = ststr2l[stcknum];
*de = ststr2r[stcknum];
}